package com.xixudi.htmlto;


import com.jfinal.kit.StrKit;
import com.xixudi.htmlto.common.*;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.List;

/**
 * luojiji 爬取并生成chm
 * @author YangZheng 328170112@qq.com
 * @date 2019-09-03 9:54
 */
public class Luojisw {
    private static String url="http://www.luojiji.com/forum-LJSWW-1.html";
    private static String baseUrl="http://www.luojiji.com/";
    private static String projectName="luojisw";
    private static String basePath= SomeKit.getPath(projectName);

    static class LuojiswDownNode extends DownNode {
        LuojiswDownNode(String url, String type, boolean cover) {
            super(url, type, cover);
        }

        LuojiswDownNode(String url, String type) {
            super(url, type,false);
        }

        @Override
        protected String makeUrl() {
            if(url.startsWith("http")) {

            } else if(url.startsWith("/")) {
                if(url.startsWith("//")) url = "http:"+url;
                else url = baseUrl+url;
            } else {
                url = baseUrl+url;
            }
            return url;
        }

        @Override
        protected String makeNewFilePath() {
            return basePath + "/source/" + ("html".equals(type) ? "" : type + "/") + name;
        }
    }

    public static void downPage(String url) {
        try {
            LuojiswDownNode page = new LuojiswDownNode(url, "html", true);
            Document doc = page.getDoc();

            doc.select("base,.head_y,.c_b,.yeei_ft,.c_yt,#pgt,.mtm,#f_pst,.wp.mtn,#postlist>table,i").remove();
            Element table = doc.select("#postlist>div>table").get(0);
            doc.select(".pi").remove();
            doc.select("#postlist>div>table>tbody>tr:not(:nth-child(1))").remove();
            doc.select("#postlist>div>table>tbody>tr>td:nth-child(1)").remove();
            doc.select("td>script").remove();

            Elements csss = doc.select("link[rel=stylesheet]");
            for (Element css : csss) {
                LuojiswDownNode n = new LuojiswDownNode(css.attr("href"), "css");
                n.saveToLocal();
                css.attr("href","./css/"+n.getName());
            }
            doc.select("link[href]:last-child").after("<link rel=\"stylesheet\" type=\"text/css\" href=\"./css/fix.css\">");

            Elements jss = doc.select("script[src]");
            for (Element js : jss) {
                String src = js.attr("src");
                if(src.startsWith("http://discuz.gtimg.cn/")) {
                    js.remove();
                    continue;
                }
                LuojiswDownNode n = new LuojiswDownNode(src, "js");
                js.attr("src", "./js/" + n.getName());
                n.saveToLocal();
            }
            Elements imgs = doc.select("img[src]");
            for (Element img : imgs) {
                String file = img.attr("file");
                LuojiswDownNode n;
                if(StrKit.notBlank(file)) {
                    n = new LuojiswDownNode(file, "img");
                    img.removeAttr("zoomfile");
                    img.removeAttr("file");
                } else {
                    n = new LuojiswDownNode(img.attr("src"), "img");
                }
                n.saveToLocal();
                img.attr("src","./img/"+n.getName());
            }

            doc.select(".pcb>a").replaceAll(e->{
                if(e.attr("href").startsWith(baseUrl)) {
                    e.attr("href","source/"+e.attr("href").substring(baseUrl.length()));
                }
                return e;
            });
            page.saveDocToLocal();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
    public static ArrayList<ContentsNode> downResource(){
        ArrayList<ContentsNode> nodes = new ArrayList<>();
        try {
            Document doc = Jsoup.connect(url).get();
            int maxPage = Integer.parseInt(doc.select("#fd_page_bottom a.last").get(0).text().substring(4));
            for (int i = 1; i <= maxPage; i++) {
                doc = Jsoup.connect(baseUrl+"forum-LJSWW-"+i+".html").get();
                Elements menus = doc.select("#threadlisttableid th a.xst");
                for (Element menu : menus) {
                    String href = menu.attr("href");
                    nodes.add(ContentsNode.newTopic(menu.text(),href.startsWith(baseUrl)?href.substring(baseUrl.length()):href));
                    downPage(href);
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        Collections.reverse(nodes);
        return nodes;
    }
    private static void appendPage(StringBuilder sb,List<ContentsNode> nodes,boolean pageBreak){
        for (ContentsNode node : nodes) {
            if(StrKit.notBlank(node.getPath())) {
                try {
                    Document doc = Jsoup.parse(new File(basePath + "/source/" + node.getPath()), "utf8");
                    Element content = doc.selectFirst("#postlist>div>table>tbody>tr table");
                    Element font = content.selectFirst("font");
                    font.tagName("h0");
                    sb.append(content.outerHtml()
                                     .replace("<br> 来源:得到","")
                                     .replace("<br> 和你一起终身学习，这里是<a href=\"http://www.luojiji.com\" target=\"_blank\" class=\"relatedlink\">罗辑思维</a>。",""));
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if(node.getChildren()!=null) {
                appendPage(sb,node.getChildren(),false);
            }
        }
    }
    private static String makePageStr(List<ContentsNode> nodes,boolean pageBreak){
        StringBuilder sb = new StringBuilder();
        appendPage(sb,nodes,pageBreak);
        return sb.toString();
    }
    public static void main(String[] args) throws IOException {
        ArrayList<ContentsNode> contentsDownNodes = downResource();
        String today = new SimpleDateFormat("yyyyMMdd").format(new Date());
        //生成chm
        MakeChm.createChm(projectName, contentsDownNodes, "index.html", "罗辑思维@"+today, true);
        //为生成pdf,拼接完整html
        String pageStr = makePageStr(ContentsNode.makeTreeData(contentsDownNodes), true);
        String html = SomeKit.renderTemplate(projectName,"temp.html",pageStr);
        //为pdf准备h1,h2的目录条目
        html = html.replaceAll("(</?h)2","$13")
                   .replaceAll("(</?h)1","$12")
                   .replaceAll("(</?h)0","$11");
        String allHtmlPath = basePath + "/source/all.html";
        Files.write(new File(allHtmlPath).toPath(),html.getBytes("utf8"));
        MakePDF.htmlToPdf(allHtmlPath, basePath+"/罗辑思维@"+today+".pdf");
    }
}